library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)
library(esri2sf)
library(readr)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

# Load the processing functions. This also loads the normalization functions.
source("safegraph_process_patterns_functions.R")

# SET your path here to the covid19analysis folder.
sg_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/'

# SET your path here to the github covid folder
github_path  <- ''

Comparing different metrics that assess social distancing. First load the data.

scc_blockgroups <-
  block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

sf_blockgroups <- 
  block_groups("CA","San Francisco", cb=F, progress_bar=F) %>% 
  st_transform('+proj=longlat +datum=WGS84')

# zip code areas
zctas_94 <- 
  zctas(cb = F, starts_with = "94") %>% 
  st_transform('+proj=longlat +datum=WGS84') %>%
  dplyr::select(ZCTA5CE10, geometry)

zctas_95 <- 
  zctas(cb = F, starts_with = "95") %>% 
  st_transform('+proj=longlat +datum=WGS84') %>%
  dplyr::select(ZCTA5CE10, geometry)

zctas_94_95 <- rbind(zctas_94, zctas_95)

# join with block groups
scc_bg_zctas <- scc_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94_95) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

sf_bg_zctas <- sf_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

# Safegraph social distancing data
bay_sd <- readRDS("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing_v2.rds") %>% 
  mutate(date = date_range_start %>%  substr(1,10) %>% as.Date())

# patterns data
sfc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-03-09_origins_daily.rds"))

sfc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/sfc_patterns_2020-03-16_origins_daily.rds"))

marin_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/mac_patterns_2020-03-09_origins_daily.rds"))

smc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/smc_patterns_2020-03-09_origins_daily.rds"))

scc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/scc_patterns_2020-03-09_origins_daily.rds"))

ccc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/ccc_patterns_2020-03-09_origins_daily.rds"))

alc_patterns_origins_daily_week_pre <- readRDS(paste0(sg_path,"weekly-patterns/v2/alc_patterns_2020-03-09_origins_daily.rds"))

marin_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/mac_patterns_2020-03-16_origins_daily.rds"))

smc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/smc_patterns_2020-03-16_origins_daily.rds"))

scc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/scc_patterns_2020-03-16_origins_daily.rds"))

ccc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/ccc_patterns_2020-03-16_origins_daily.rds"))

alc_patterns_origins_daily_week1 <- readRDS(paste0(sg_path,"weekly-patterns/v2/alc_patterns_2020-03-16_origins_daily.rds"))

# census data processing
acs_vars = readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/censusData2018_acs_acs5.rds")

sf_fips <- fips("CA", "San Francisco") %>% substr(3,5)

# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
    regionString <- paste0("state:06+county:", county)
    censusData <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
  
  return(censusData)
}

For now, just going to look at SF block groups. Also filter social distancing data to be in the date ranges of the patterns data.

sfc_patterns_origins_daily_week_pre_in_sf <- sfc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

sfc_patterns_origins_daily_week1_in_sf <- sfc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

marin_patterns_origins_daily_week1_in_sf <- marin_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

marin_patterns_origins_daily_week_pre_in_sf <- marin_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

smc_patterns_origins_daily_week1_in_sf <- smc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

smc_patterns_origins_daily_week_pre_in_sf <- smc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

scc_patterns_origins_daily_week1_in_sf <- scc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

scc_patterns_origins_daily_week_pre_in_sf <- scc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

ccc_patterns_origins_daily_week1_in_sf <- ccc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

ccc_patterns_origins_daily_week_pre_in_sf <- ccc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

alc_patterns_origins_daily_week1_in_sf <- alc_patterns_origins_daily_week1 %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

alc_patterns_origins_daily_week_pre_in_sf <- alc_patterns_origins_daily_week_pre %>%
  filter(origin_census_block_group %in% sf_bg_zctas$blockgroup)

combined_sf_week_pre_1_by_date <- rbind(sfc_patterns_origins_daily_week_pre_in_sf, sfc_patterns_origins_daily_week1_in_sf, marin_patterns_origins_daily_week1_in_sf, marin_patterns_origins_daily_week_pre_in_sf, smc_patterns_origins_daily_week1_in_sf, smc_patterns_origins_daily_week_pre_in_sf, scc_patterns_origins_daily_week1_in_sf, scc_patterns_origins_daily_week_pre_in_sf, ccc_patterns_origins_daily_week1_in_sf, ccc_patterns_origins_daily_week_pre_in_sf, alc_patterns_origins_daily_week1_in_sf, alc_patterns_origins_daily_week_pre_in_sf) %>%
  group_by(origin_census_block_group, date) %>%
  summarize(visit_counts_high_zip = sum(visit_counts_high), visit_counts_low_zip = sum(visit_counts_low)) %>%
  filter(!is.na(visit_counts_high_zip)) %>%
  mutate(visit_counts_avg = (visit_counts_high_zip + visit_counts_low_zip)/2)

sf_sd_pre_1 <- bay_sd %>%
  filter(origin_census_block_group %in% sf_blockgroups$GEOID) %>% 
  filter(date >= min(combined_sf_week_pre_1_by_date$date) & date <= max(combined_sf_week_pre_1_by_date$date)) %>%
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode), by = c("origin_census_block_group" = "blockgroup")) %>%
  filter(!is.na(zipcode)) %>% 
  group_by(origin_census_block_group, date) %>%
  summarize(total_at_home = sum(completely_home_device_count), total_devices = sum(device_count), dwell_time = sum(median_non_home_dwell_time), distance_traveled = sum(distance_traveled_from_home)) %>%
  mutate(
    percent_at_home = total_at_home*100/total_devices,
    percent_leaving_home = (100 - percent_at_home)
  )

# join these 
sf_sd_visits_pre_1 <- left_join(combined_sf_week_pre_1_by_date, sf_sd_pre_1)

Get population data.

sf_pop_block <- pullCensus("B01003_001E", sf_fips) %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  rename(total_pop = B01003_001E)

sf_sd_visits_pre_1 <- sf_sd_visits_pre_1 %>% left_join(sf_pop_block, by = c("origin_census_block_group" = "blockgroup")) %>% 
  mutate(visits_per_capita = visit_counts_avg/ total_pop, dwell_per_capita = dwell_time/total_pop, distance_per_capita = distance_traveled/total_pop)

sf_sd_visits_pre_1_zip <- sf_sd_visits_pre_1 %>%
  group_by(zipcode, date) %>%
  summarize(total_at_home = sum(total_at_home), total_devices = sum(total_devices), dwell_time = sum(dwell_time), distance_traveled = sum(distance_traveled), visit_counts_avg = sum(visit_counts_avg), total_pop_zip = sum(total_pop)) %>%
  mutate(
    percent_at_home = total_at_home*100/total_devices,
    percent_leaving_home = (100 - percent_at_home), 
    visits_per_capita = visit_counts_avg/ total_pop_zip, 
    dwell_per_capita = dwell_time/total_pop_zip, 
    distance_per_capita = distance_traveled/total_pop_zip
  ) %>%
  filter(!is.na(zipcode))
fig_test <- sf_sd_visits_pre_1_zip %>% filter(date == min(date)) %>% plot_ly() %>%
  add_trace(x = ~zipcode, y = ~percent_leaving_home/10, type = 'scatter', mode = 'markers',name = "percent leaving/10") %>%
  add_trace(x = ~zipcode, y = ~visits_per_capita, type = 'scatter', mode = 'markers',name = "visits per capita") %>%
  add_trace(x = ~zipcode, y = ~dwell_per_capita, type = 'scatter', mode = 'markers',name = "dwell per capita") %>%
  add_trace(x = ~zipcode, y = ~distance_per_capita, type = 'scatter', mode = 'markers',name = "distance per capita")

fig_test

Mapping for a specific day.

sf_sd_visits_3_9_geom <- sf_sd_visits_pre_1 %>% filter(date == min(date)) %>% st_as_sf() %>% ungroup()

mapview(sf_sd_visits_3_9_geom %>% dplyr::select(percent_leaving_home))
mapview(sf_sd_visits_3_9_geom %>% dplyr::select(visits_per_capita))
# mapview(sf_sd_visits_3_9_geom %>% dplyr::select(distance_per_capita))

# mapview(sf_sd_visits_3_9_geom %>% dplyr::select(dwell_per_capita))

# by zip code
sf_sd_visits_3_9_geom_zip <- sf_sd_visits_pre_1_zip %>% filter(date == min(date)) %>% 
  left_join(zctas_94, by = c("zipcode" = "ZCTA5CE10")) %>%
  st_as_sf() %>% ungroup()

mapview(sf_sd_visits_3_9_geom_zip %>% dplyr::select(percent_leaving_home))
mapview(sf_sd_visits_3_9_geom_zip %>% dplyr::select(visits_per_capita))
# mapview(sf_sd_visits_3_9_geom_zip %>% dplyr::select(distance_per_capita))
# 
# mapview(sf_sd_visits_3_9_geom_zip %>% dplyr::select(dwell_per_capita))